1. Getting sequences from csv file
a. installing and loading packages
install.packages("seqinr")
install.packages("ape")
install.packages("tidyverse")
install.packages("readxl")
library(seqinr)
library(ape)
library(dplyr)
library(tidyverse)
install.packages("xlsx")
library(xlsx)
b. reading sequences from file
setwd(dir ="/Users/amrit/Desktop/order_name")
order_name_matk_num <-read.csv("order_name_matk.csv")
order_name_matk <- read.GenBank(order_name_matk_num$matk)
order_name_rbcL_num <- read.csv("order_name_rbcl.csv")
order_name_rbcL <- read.GenBank(order_name_rbcL_num$rbcL)
order_name_trnh_num <- read.csv("order_name_trnh.csv")
order_name_trnh <- read.GenBank(order_name_trnh_num$trnh_psba)
its1_num <- read.csv("order_name_its1.csv")
its1 <- read.GenBank(its1_num$its)
its2_num <- read.csv("order_name_its2.csv")
its2 <- read.GenBank(its2_num$its2)
c. adding accession names to species names
matk_IDs <- paste(attr(order_name_matk, "species"), names(order_name_matk), sep = "_matK_")
rbcL_IDs <- paste(attr(order_name_rbcL, "species"), names(order_name_rbcL), sep = "_rbcL_")
trnh_IDs <- paste(attr(order_name_trnh, "species"), names(order_name_trnh), sep = "_trnh_")
its1_IDs <- paste(attr(its1, "species"), names(its1), sep = "_its1_")
its2_IDs <- paste(attr(its2, "species"), names(its2), sep = "_its2_")
matk_IDs
rbcL_IDs
trnh_IDs
its1_IDs
its2_IDs
f. wrting fasta files
write.fasta(sequences = order_name_matk_seqinr, names = order_name_matk_num$speciesnames, file.out = "order_name_matk_sequences.fasta")
write.fasta(sequences = order_name_rbcL_seqinr, names = order_name_rbcL_num$speciesnames, file.out = "order_name_rbcL_sequences.fasta")
write.fasta(sequences = order_name_trnh_seqinr, names = order_name_trnh_num$speciesnames, file.out = "order_name_trnh_sequences.fasta")
write.fasta(sequences = its1_seqinr, names = its1_num$speciesnames, file.out = "its1_sequences.fasta")
write.fasta(sequences = its2_seqinr, names = its2_num$speciesnames, file.out = "its2_sequences.fasta")
2. Creating cophyloplots for each order
# use the previously installed packages
library(ape)
library(phytools)
a. reading and preparing the trees
order_name_chloroplast_tree <- ape::read.tree("~/Desktop/Manuscript/order_name/order_name.tre")
order_name_nuclear_tree <- ape::read.tree("~/Desktop/Manuscript/order_name/order_name_its2.tre")
order_name_chloroplast_tree <- compute.brlen(order_name_chloroplast_tree)
order_name_nuclear_tree <- compute.brlen(order_name_nuclear_tree)
order_name_chloroplast_tree <- midpoint_root(order_name_chloroplast_tree)
order_name_nuclear_tree <- midpoint.root(order_name_nuclear_tree)
b. defining association matrix
order_name_association <- cbind(order_name_chloroplast_tree$tip.label, order_name_chloroplast_tree$tip.label)
c. creating cophylo object
obj <- cophylo(order_name_chloroplast_tree, order_name_nuclear_tree, assoc = order_name_association, print = TRUE)
d. plot to see how it looks
plot(order_name_chloroplast_tree, lwd = 2, pts = 1, link.type = "curved", link.lwd = 2, link.lty = "solid", link.col = make.transparent("blue", .25), fsize = 1, rotate.multi = TRUE)
plot(order_name_nuclear_tree, lwd = 2, pts = 1, link.type = "curved", link.lwd = 2, link.lty = "solid", link.col = make.transparent("blue", .25), fsize = 1, rotate.multi = TRUE)
e. save to PDF
pdf("~/Desktop/Manuscript/order_name/order_name_cophylo_plot.pdf", width = 10, height = 10)
plot(obj,
lwd = 2, pts = 1,
link.type = "curved",
link.lwd = 2,
link.lty = "solid",
link.col = make.transparent("blue", 0.25),
fsize = 1,
rotate.multi = TRUE)
dev.off()
LS0tCnRpdGxlOiAiQ29kZXMgdXNlZCBmb3IgZ2V0dGluZyBzZXF1ZW5jZXMgZm9yIHBoeWxvZ2VuZXRpYyB0cmVlIGFuZCBtYWtpbmcgVGFuZ2xlZ3JhbXMiCm91dHB1dDogaHRtbF9ub3RlYm9vawotLS0KCiMgMS4gR2V0dGluZyBzZXF1ZW5jZXMgZnJvbSBjc3YgZmlsZQoKIyMjIGEuIGluc3RhbGxpbmcgYW5kIGxvYWRpbmcgcGFja2FnZXMKYGBge3J9Cmluc3RhbGwucGFja2FnZXMoInNlcWluciIpCmluc3RhbGwucGFja2FnZXMoImFwZSIpCmluc3RhbGwucGFja2FnZXMoInRpZHl2ZXJzZSIpCmluc3RhbGwucGFja2FnZXMoInJlYWR4bCIpCmxpYnJhcnkoc2VxaW5yKQpsaWJyYXJ5KGFwZSkKbGlicmFyeShkcGx5cikKbGlicmFyeSh0aWR5dmVyc2UpCmluc3RhbGwucGFja2FnZXMoInhsc3giKQpsaWJyYXJ5KHhsc3gpCmBgYAoKIyMjIGIuIHJlYWRpbmcgc2VxdWVuY2VzIGZyb20gZmlsZQoKYGBge3J9CnNldHdkKGRpciA9Ii9Vc2Vycy9hbXJpdC9EZXNrdG9wL29yZGVyX25hbWUiKQoKb3JkZXJfbmFtZV9tYXRrX251bSA8LXJlYWQuY3N2KCJvcmRlcl9uYW1lX21hdGsuY3N2IikKb3JkZXJfbmFtZV9tYXRrIDwtIHJlYWQuR2VuQmFuayhvcmRlcl9uYW1lX21hdGtfbnVtJG1hdGspCgpvcmRlcl9uYW1lX3JiY0xfbnVtIDwtIHJlYWQuY3N2KCJvcmRlcl9uYW1lX3JiY2wuY3N2IikKb3JkZXJfbmFtZV9yYmNMIDwtIHJlYWQuR2VuQmFuayhvcmRlcl9uYW1lX3JiY0xfbnVtJHJiY0wpCgpvcmRlcl9uYW1lX3RybmhfbnVtIDwtIHJlYWQuY3N2KCJvcmRlcl9uYW1lX3RybmguY3N2IikKb3JkZXJfbmFtZV90cm5oIDwtIHJlYWQuR2VuQmFuayhvcmRlcl9uYW1lX3RybmhfbnVtJHRybmhfcHNiYSkKCml0czFfbnVtIDwtIHJlYWQuY3N2KCJvcmRlcl9uYW1lX2l0czEuY3N2IikKaXRzMSA8LSByZWFkLkdlbkJhbmsoaXRzMV9udW0kaXRzKQoKaXRzMl9udW0gPC0gcmVhZC5jc3YoIm9yZGVyX25hbWVfaXRzMi5jc3YiKQppdHMyIDwtIHJlYWQuR2VuQmFuayhpdHMyX251bSRpdHMyKQoKYGBgCgojIyMgYy4gYWRkaW5nIGFjY2Vzc2lvbiBuYW1lcyB0byBzcGVjaWVzIG5hbWVzCgpgYGB7cn0KbWF0a19JRHMgPC0gcGFzdGUoYXR0cihvcmRlcl9uYW1lX21hdGssICJzcGVjaWVzIiksIG5hbWVzKG9yZGVyX25hbWVfbWF0ayksIHNlcCA9ICJfbWF0S18iKQpyYmNMX0lEcyA8LSBwYXN0ZShhdHRyKG9yZGVyX25hbWVfcmJjTCwgInNwZWNpZXMiKSwgbmFtZXMob3JkZXJfbmFtZV9yYmNMKSwgc2VwID0gIl9yYmNMXyIpCnRybmhfSURzIDwtIHBhc3RlKGF0dHIob3JkZXJfbmFtZV90cm5oLCAic3BlY2llcyIpLCBuYW1lcyhvcmRlcl9uYW1lX3RybmgpLCBzZXAgPSAiX3RybmhfIikKaXRzMV9JRHMgPC0gcGFzdGUoYXR0cihpdHMxLCAic3BlY2llcyIpLCBuYW1lcyhpdHMxKSwgc2VwID0gIl9pdHMxXyIpCml0czJfSURzIDwtIHBhc3RlKGF0dHIoaXRzMiwgInNwZWNpZXMiKSwgbmFtZXMoaXRzMiksIHNlcCA9ICJfaXRzMl8iKQoKbWF0a19JRHMKcmJjTF9JRHMKdHJuaF9JRHMKaXRzMV9JRHMKaXRzMl9JRHMKYGBgCgojIyMgZC4gd3JpdGluZyB0aGVtIGluIGZhc3RhIGZvcm1hdAoKYGBge3J9CndyaXRlLmRuYShvcmRlcl9uYW1lX21hdGssIGZpbGUgPSAib3JkZXJfbmFtZV9tYXRrLmZhc3RhIiwgZm9ybWF0ID0gImZhc3RhIiwgYXBwZW5kID0gRkFMU0UpCndyaXRlLmRuYShvcmRlcl9uYW1lX3JiY0wsIGZpbGUgPSAib3JkZXJfbmFtZV9yYmNMLmZhc3RhIiwgZm9ybWF0ID0gImZhc3RhIiwgYXBwZW5kID0gRkFMU0UpCndyaXRlLmRuYShvcmRlcl9uYW1lX3RybmgsIGZpbGUgPSAib3JkZXJfbmFtZV90cm5oLmZhc3RhIiwgZm9ybWF0ID0gImZhc3RhIiwgYXBwZW5kID0gRkFMU0UpCndyaXRlLmRuYShpdHMxLCBmaWxlID0gIml0czEuZmFzdGEiLCBmb3JtYXQgPSAiZmFzdGEiLCBhcHBlbmQgPSBGQUxTRSkKd3JpdGUuZG5hKGl0czIsIGZpbGUgPSAiaXRzMi5mYXN0YSIsIGZvcm1hdCA9ICJmYXN0YSIsIGFwcGVuZCA9IEZBTFNFKQpgYGAKCiMjIyBlLiByZXdyaXRpbmcgd2l0aCBtb3JlIGluZm9ybWF0aW9uCgpgYGB7cn0Kb3JkZXJfbmFtZV9tYXRrX3NlcWluciA8LSByZWFkLmZhc3RhKGZpbGUgPSAib3JkZXJfbmFtZV9tYXRrLmZhc3RhIiwgc2VxdHlwZSA9ICJETkEiLCBhcy5zdHJpbmcgPSBUUlVFLCBmb3JjZUROQXRvbG93ZXIgPSBGQUxTRSkKb3JkZXJfbmFtZV9yYmNMX3NlcWluciA8LSByZWFkLmZhc3RhKGZpbGUgPSAib3JkZXJfbmFtZV9yYmNMLmZhc3RhIiwgc2VxdHlwZSA9ICJETkEiLCBhcy5zdHJpbmcgPSBUUlVFLCBmb3JjZUROQXRvbG93ZXIgPSBGQUxTRSkKb3JkZXJfbmFtZV90cm5oX3NlcWluciA8LSByZWFkLmZhc3RhKGZpbGUgPSAib3JkZXJfbmFtZV90cm5oLmZhc3RhIiwgc2VxdHlwZSA9ICJETkEiLCBhcy5zdHJpbmcgPSBUUlVFLCBmb3JjZUROQXRvbG93ZXIgPSBGQUxTRSkKaXRzMV9zZXFpbnIgPC0gcmVhZC5mYXN0YShmaWxlID0gIml0czEuZmFzdGEiLCBzZXF0eXBlID0gIkROQSIsIGFzLnN0cmluZyA9IFRSVUUsIGZvcmNlRE5BdG9sb3dlciA9IEZBTFNFKQppdHMyX3NlcWluciA8LSByZWFkLmZhc3RhKGZpbGUgPSAiaXRzMi5mYXN0YSIsIHNlcXR5cGUgPSAiRE5BIiwgYXMuc3RyaW5nID0gVFJVRSwgZm9yY2VETkF0b2xvd2VyID0gRkFMU0UpCmBgYAoKIyMjIGYuIHdydGluZyBmYXN0YSBmaWxlcwoKYGBge3J9CndyaXRlLmZhc3RhKHNlcXVlbmNlcyA9IG9yZGVyX25hbWVfbWF0a19zZXFpbnIsIG5hbWVzID0gb3JkZXJfbmFtZV9tYXRrX251bSRzcGVjaWVzbmFtZXMsIGZpbGUub3V0ID0gIm9yZGVyX25hbWVfbWF0a19zZXF1ZW5jZXMuZmFzdGEiKQp3cml0ZS5mYXN0YShzZXF1ZW5jZXMgPSBvcmRlcl9uYW1lX3JiY0xfc2VxaW5yLCBuYW1lcyA9IG9yZGVyX25hbWVfcmJjTF9udW0kc3BlY2llc25hbWVzLCBmaWxlLm91dCA9ICJvcmRlcl9uYW1lX3JiY0xfc2VxdWVuY2VzLmZhc3RhIikKd3JpdGUuZmFzdGEoc2VxdWVuY2VzID0gb3JkZXJfbmFtZV90cm5oX3NlcWluciwgbmFtZXMgPSBvcmRlcl9uYW1lX3RybmhfbnVtJHNwZWNpZXNuYW1lcywgZmlsZS5vdXQgPSAib3JkZXJfbmFtZV90cm5oX3NlcXVlbmNlcy5mYXN0YSIpCndyaXRlLmZhc3RhKHNlcXVlbmNlcyA9IGl0czFfc2VxaW5yLCBuYW1lcyA9IGl0czFfbnVtJHNwZWNpZXNuYW1lcywgZmlsZS5vdXQgPSAiaXRzMV9zZXF1ZW5jZXMuZmFzdGEiKQp3cml0ZS5mYXN0YShzZXF1ZW5jZXMgPSBpdHMyX3NlcWluciwgbmFtZXMgPSBpdHMyX251bSRzcGVjaWVzbmFtZXMsIGZpbGUub3V0ID0gIml0czJfc2VxdWVuY2VzLmZhc3RhIikKYGBgCgoKIyAyLiBDcmVhdGluZyBjb3BoeWxvcGxvdHMgZm9yIGVhY2ggb3JkZXIKCmBgYHtyfQojIHVzZSB0aGUgcHJldmlvdXNseSBpbnN0YWxsZWQgcGFja2FnZXMKbGlicmFyeShhcGUpCmxpYnJhcnkocGh5dG9vbHMpCmBgYAoKIyMjIGEuIHJlYWRpbmcgYW5kIHByZXBhcmluZyB0aGUgdHJlZXMKCmBgYHtyfQpvcmRlcl9uYW1lX2NobG9yb3BsYXN0X3RyZWUgPC0gYXBlOjpyZWFkLnRyZWUoIn4vRGVza3RvcC9NYW51c2NyaXB0L29yZGVyX25hbWUvb3JkZXJfbmFtZS50cmUiKQpvcmRlcl9uYW1lX251Y2xlYXJfdHJlZSA8LSBhcGU6OnJlYWQudHJlZSgifi9EZXNrdG9wL01hbnVzY3JpcHQvb3JkZXJfbmFtZS9vcmRlcl9uYW1lX2l0czIudHJlIikgCgpvcmRlcl9uYW1lX2NobG9yb3BsYXN0X3RyZWUgPC0gY29tcHV0ZS5icmxlbihvcmRlcl9uYW1lX2NobG9yb3BsYXN0X3RyZWUpCm9yZGVyX25hbWVfbnVjbGVhcl90cmVlIDwtIGNvbXB1dGUuYnJsZW4ob3JkZXJfbmFtZV9udWNsZWFyX3RyZWUpCgpvcmRlcl9uYW1lX2NobG9yb3BsYXN0X3RyZWUgPC0gbWlkcG9pbnRfcm9vdChvcmRlcl9uYW1lX2NobG9yb3BsYXN0X3RyZWUpCm9yZGVyX25hbWVfbnVjbGVhcl90cmVlIDwtIG1pZHBvaW50LnJvb3Qob3JkZXJfbmFtZV9udWNsZWFyX3RyZWUpCmBgYAoKIyMjIGIuIGRlZmluaW5nIGFzc29jaWF0aW9uIG1hdHJpeAoKYGBge3J9Cm9yZGVyX25hbWVfYXNzb2NpYXRpb24gPC0gY2JpbmQob3JkZXJfbmFtZV9jaGxvcm9wbGFzdF90cmVlJHRpcC5sYWJlbCwgb3JkZXJfbmFtZV9jaGxvcm9wbGFzdF90cmVlJHRpcC5sYWJlbCkKCmBgYAoKIyMjIGMuIGNyZWF0aW5nIGNvcGh5bG8gb2JqZWN0CgpgYGB7cn0Kb2JqIDwtIGNvcGh5bG8ob3JkZXJfbmFtZV9jaGxvcm9wbGFzdF90cmVlLCBvcmRlcl9uYW1lX251Y2xlYXJfdHJlZSwgYXNzb2MgPSBvcmRlcl9uYW1lX2Fzc29jaWF0aW9uLCBwcmludCA9IFRSVUUpCmBgYAoKIyMjIGQuIHBsb3QgdG8gc2VlIGhvdyBpdCBsb29rcwoKYGBge3J9CnBsb3Qob3JkZXJfbmFtZV9jaGxvcm9wbGFzdF90cmVlLCBsd2QgPSAyLCBwdHMgPSAxLCBsaW5rLnR5cGUgPSAiY3VydmVkIiwgbGluay5sd2QgPSAyLCBsaW5rLmx0eSA9ICJzb2xpZCIsIGxpbmsuY29sID0gbWFrZS50cmFuc3BhcmVudCgiYmx1ZSIsIC4yNSksIGZzaXplID0gMSwgcm90YXRlLm11bHRpID0gVFJVRSkKcGxvdChvcmRlcl9uYW1lX251Y2xlYXJfdHJlZSwgbHdkID0gMiwgcHRzID0gMSwgbGluay50eXBlID0gImN1cnZlZCIsIGxpbmsubHdkID0gMiwgbGluay5sdHkgPSAic29saWQiLCBsaW5rLmNvbCA9IG1ha2UudHJhbnNwYXJlbnQoImJsdWUiLCAuMjUpLCBmc2l6ZSA9IDEsIHJvdGF0ZS5tdWx0aSA9IFRSVUUpCmBgYAoKIyMjIGUuIHNhdmUgdG8gUERGCgpgYGB7cn0KcGRmKCJ+L0Rlc2t0b3AvTWFudXNjcmlwdC9vcmRlcl9uYW1lL29yZGVyX25hbWVfY29waHlsb19wbG90LnBkZiIsIHdpZHRoID0gMTAsIGhlaWdodCA9IDEwKQpwbG90KG9iaiwKICAgICBsd2QgPSAyLCBwdHMgPSAxLAogICAgIGxpbmsudHlwZSA9ICJjdXJ2ZWQiLAogICAgIGxpbmsubHdkID0gMiwKICAgICBsaW5rLmx0eSA9ICJzb2xpZCIsCiAgICAgbGluay5jb2wgPSBtYWtlLnRyYW5zcGFyZW50KCJibHVlIiwgMC4yNSksCiAgICAgZnNpemUgPSAxLAogICAgIHJvdGF0ZS5tdWx0aSA9IFRSVUUpCmRldi5vZmYoKQpgYGAKCg==